Author

Joe

Published

March 2, 2024

Code
start_time <- Sys.time()

suppressPackageStartupMessages(library(ggpath))
suppressPackageStartupMessages(library(plotly))
suppressPackageStartupMessages(library(tidyverse))

# Wong, B. Points of view: Color blindness. Nat Methods (2011).
bla <- '#000000'
blu <- '#0072b2'
grb <- '#56b4e9'
lir <- '#cc79a7'
gre <- '#009e73'
red <- '#d55e00'
org <- '#e69f00'
yel <- '#f0e442'
gry <- '#BBBBBB'

jam_cols   <- c(blu,red,gre,org,grb,lir,gry,bla)
jam_shapes <- c(21,22,23,24,25)

options(ggplot2.discrete.colour = jam_cols)
options(ggplot2.discrete.fill = jam_cols)
options(scipen = 10L)

jam_theme <-  theme_minimal() +
                theme(text=element_text(size=14),
                      axis.text=element_text(size=12),
                      axis.title.y = element_text(margin = margin(t = 0, r = 8, b = 0, l = 0)),
                            axis.title.x = element_text(margin = margin(t = 0, r = 0, b = 8, l = 0)))

# Create color pallete based on lineup image @ https://coolors.co/image-picker
my_cols <- c("#52BFEC","#AA1880","#EC0059","#08BCDF","#4C1064", "#FF00BC", "#2249CD","#53007D", "#FF6B02","#B319B2","#EAE100", "#BF068F")

Summary

This notebook shows how I searched the Spotify and Last.fm APIs to find data on EDC 2024 artists. I was curious to find the most popular artists.

EDC 2024 Lineup

EDC Lineup 2024

Convert lineup image to text

Create text list of EDC artists using imagetotext.io

Code
#https://www.imagetotext.io/ to get artist names from edc artist lineup PNG

edc_artists <- read_tsv("C:/Users/joseph.mcgirr/Personal/R_fun/spotify/edc_2024_artists_text_from_png.txt") |> unique() |> arrange(artist)

#edc_artists

Collect Artist Data

Spotify

Access Spotify API using package spotifyr

You need to set up a Dev account with Spotify to access their Web API here. It is very quick and easy.

Code
#install.packages("spotifyr")
library(spotifyr)

access_token <- get_spotify_access_token()


# Use search_spotify() to find spotify artist ids from artist names
# No ID for Domina, Hint of Lavender, Marlie, VUIIIGUR

spotify_artist_id <- vector("character", length(edc_artists$artist))

for(i in seq_along(edc_artists$artist)){
    
    i_search_spotify <- search_spotify(edc_artists$artist[i])
    
    if(edc_artists$artist[i] %in% toupper(i_search_spotify$artists$items$name)){
        
        exact_artist_name_match <- which(toupper(i_search_spotify$artists$items$name) == edc_artists$artist[i])[1]
        spotify_artist_id[[i]] <- i_search_spotify$artists$items$id[exact_artist_name_match]
        
    }else{
    
        spotify_artist_id[[i]] <- NA
        cat(paste0("\nNo exact match for artist name: ",edc_artists$artist[i], "\n\n"))
        #                    "\n", "Using: ", i_search_spotify$artists$items$name[1], 
        #                    "\n", i_search_spotify$artists$items$external_urls.spotify[1], "\n\n"))
        
        # spotify_artist_id[[i]] <- i_search_spotify$artists$items$id[1]
        # cat(paste0("\nNo exact match for artist name: ",edc_artists$artist[i],
        #                    "\n", "Using: ", i_search_spotify$artists$items$name[1], 
        #                    "\n", i_search_spotify$artists$items$external_urls.spotify[1], "\n\n"))
    }

}

edc_artists$spotify_artist_id <- spotify_artist_id


# Use get_artist() to get genres, followers, and popularity

genres     <- vector("character", length(edc_artists$artist))
followers  <- vector("character", length(edc_artists$artist))
popularity <- vector("character", length(edc_artists$artist))
image_url  <- vector("character", length(edc_artists$artist))

for(i in seq_along(edc_artists$spotify_artist_id)){
    
    if(!is.na(edc_artists$spotify_artist_id[i])){
    
    i_artist_info <- get_artist(edc_artists$spotify_artist_id[i])
    
    genres[[i]]     <- paste0(i_artist_info$genres, collapse = ",")
    followers[[i]]  <- i_artist_info$followers$total
    popularity[[i]] <- i_artist_info$popularity
    image_url[[i]]  <- ifelse(!is.null(i_artist_info$images$url[1]), i_artist_info$images$url[1], NA)
    
    }else{
        
        genres[[i]] <- followers[[i]]  <- popularity[[i]] <- image_url[[i]] <- NA
        
    }
    
    if(!is.na(genres[[i]]) & genres[[i]] == ""){genres[[i]] <- NA}

}

edc_artists$genres     <- genres   
edc_artists$followers  <- as.numeric(followers)
edc_artists$popularity <- as.numeric(popularity)
edc_artists$image_url  <- image_url

Last.fm

Access Last.fm API using package lastfmR

Code
#devtools::install_github("ppatrzyk/lastfmR")
library(lastfmR)
# masks get_tracks()


lastfm_artist_info <- get_artist_info(artist_vector = edc_artists$artist) |> tibble()

edc_artists <- full_join(edc_artists, lastfm_artist_info)

# write.table(edc_artists, "C:/Users/joseph.mcgirr/Personal/R_fun/spotify/edc_2024_artist_data.txt", row.names = F, quote = F, sep = "\t")

Spotify Followers

Code
edc_artists <- read_tsv("C:/Users/joseph.mcgirr/Personal/R_fun/spotify/edc_2024_artist_data.txt")

plot.top.artists <- function(column_name, top, plot_title, include_images = "false"){

i_plot <- arrange(edc_artists, desc(!!sym(column_name))) |>
                    head(top) 
            
p1 <- ggplot(i_plot, aes(reorder(artist, !!sym(column_name)), !!sym(column_name))) +
            geom_segment(aes(x = reorder(artist, !!sym(column_name)), xend = reorder(artist, !!sym(column_name)), y=0, yend = !!sym(column_name), color = artist),
                                     linewidth = 3)  +
        #geom_from_path(aes(path = image_url), width = 0.052) +
            coord_flip(clip = "off") +
            scale_color_manual(values = rep(my_cols,3)) +
            jam_theme +
            theme(axis.title.x=element_blank(),
                        axis.title.y=element_blank(),
                        legend.position = "none",
                        text=element_text(size=20, face="bold", color="white"),
                    axis.text.x=element_text(size=10, color="white"),
                        axis.text.y=element_text(size=13, color="white"),
                        plot.title=element_text(face="bold", color="white"),
                        plot.background = element_rect(fill = "#5E61AF"),
                        plot.margin = margin(1,1,1.5,1.2, "cm")) +
            ggtitle(plot_title)

    if(include_images == "true"){
        p1 <- p1 + geom_from_path(aes(path = image_url), width = 0.052) 
    }

    return(p1)

}

plot.top.artists("followers", 10, "Top 10 EDC artists with the most followers on Spotify", include_images = "true")

Code
plot.top.artists("followers", 30, "Top 30 EDC artists with the most followers on Spotify", include_images = "false")

Spotify “Popularity”

Code
plot.top.artists("popularity", 10, "Top 10 most popular EDC artists according to Spotify", include_images = "true")

Code
plot.top.artists("popularity", 30, "Top 30 most popular EDC artists according to Spotify", include_images = "false")

Last.fm Global Listeners

Code
plot.top.artists("global_listeners", 10, "Top 10 artists with the most listeners on Last.fm", include_images = "true")

Code
plot.top.artists("global_listeners", 30, "Top 30 artists with the most listeners on Last.fm", include_images = "false")

Notes

Run time

Code
Sys.time() - start_time
Time difference of 10.07798 secs

Session

Code
sessionInfo()
R version 4.3.2 (2023-10-31 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19045)

Matrix products: default


locale:
[1] LC_COLLATE=English_United States.utf8 
[2] LC_CTYPE=English_United States.utf8   
[3] LC_MONETARY=English_United States.utf8
[4] LC_NUMERIC=C                          
[5] LC_TIME=English_United States.utf8    

time zone: America/Los_Angeles
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] lubridate_1.9.3 forcats_1.0.0   stringr_1.5.1   dplyr_1.1.4    
 [5] purrr_1.0.2     readr_2.1.4     tidyr_1.3.0     tibble_3.2.1   
 [9] tidyverse_2.0.0 plotly_4.10.3   ggplot2_3.4.4   ggpath_1.0.1   

loaded via a namespace (and not attached):
 [1] utf8_1.2.4         generics_0.1.3     stringi_1.8.3      hms_1.1.3         
 [5] digest_0.6.33      magrittr_2.0.3     evaluate_0.23      grid_4.3.2        
 [9] timechange_0.2.0   fastmap_1.1.1      jsonlite_1.8.8     httr_1.4.7        
[13] fansi_1.0.6        viridisLite_0.4.2  scales_1.3.0       lazyeval_0.2.2    
[17] cli_3.6.2          crayon_1.5.2       rlang_1.1.2        bit64_4.0.5       
[21] munsell_0.5.0      withr_2.5.2        cachem_1.0.8       yaml_2.3.8        
[25] parallel_4.3.2     tools_4.3.2        tzdb_0.4.0         memoise_2.0.1     
[29] colorspace_2.1-0   curl_5.2.0         vctrs_0.6.5        R6_2.5.1          
[33] magick_2.8.1       lifecycle_1.0.4    bit_4.0.5          htmlwidgets_1.6.4 
[37] vroom_1.6.5        pkgconfig_2.0.3    pillar_1.9.0       gtable_0.3.4      
[41] Rcpp_1.0.11        data.table_1.14.10 glue_1.6.2         xfun_0.41         
[45] tidyselect_1.2.0   rstudioapi_0.15.0  knitr_1.45         farver_2.1.1      
[49] htmltools_0.5.7    labeling_0.4.3     rmarkdown_2.25     compiler_4.3.2